#load packages
#assumption check function
overdisp_fun <- function(model) {
rdf <- df.residual(model)
rp <- residuals(model,type="pearson")
Pearson.chisq <- sum(rp^2)
prat <- Pearson.chisq/rdf
pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval)
}
model_check <- function(model_name){
hist(resid(model_name))
overdisp_fun(model_name)
plot(fitted(model_name), resid(model_name)) #residuals vs fitted
abline(h=0)
sim_res <- simulateResiduals(model_name, n = 1000)
# QQ plot
plotQQunif(sim_res) # QQ plot for simulated residuals
plot(sim_res)
testZeroInflation(sim_res)
print(overdisp_fun(model_name))
}
#load data
## Rows: 2191051 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): text, speaker, party, parties_in_government
## dbl (4): paragraph, sentence_nr, prediction, confidence
## lgl (1): current_speaker_in_government
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Make personal leaderboard
blame_leaderboard <- inf_data_filtered %>%
group_by(speaker) %>%
summarise(total_blame = sum(prediction),
n_sent = n(),
blame_percentage = (total_blame/n_sent)*100,
parties = list(unique(party))) %>%
filter(n_sent>=1000) %>%
ungroup() %>%
arrange(desc(blame_percentage))
head(blame_leaderboard, 10)
## # A tibble: 10 × 5
## speaker total_blame n_sent blame_percentage parties
## <chr> <dbl> <int> <dbl> <list>
## 1 Søren Krarup 1051 6075 17.3 <fct [1]>
## 2 Pernille Vermund 817 5090 16.1 <fct [1]>
## 3 Jesper Langballe 779 5469 14.2 <fct [1]>
## 4 Nikolaj Villumsen 659 4751 13.9 <fct [1]>
## 5 Mette Thiesen 671 5490 12.2 <fct [1]>
## 6 Helge Adam Møller 539 4499 12.0 <fct [1]>
## 7 Villy Søvndal 1905 15973 11.9 <fct [1]>
## 8 Margrete Auken 663 5604 11.8 <fct [1]>
## 9 Holger K. Nielsen 1439 12628 11.4 <fct [1]>
## 10 Kenneth Kristensen Berth 364 3227 11.3 <fct [1]>
#Make party leaderboard
party_leaderboard <- inf_data_filtered %>%
group_by(party) %>%
summarise(party_n_sent = n(),
party_blame = sum(prediction),
party_blame_percentage = (party_blame/party_n_sent)*100) %>%
filter(party_n_sent>=1000) %>%
arrange(desc(party_blame_percentage)) %>%
ungroup()
head(party_leaderboard, 12)
## # A tibble: 12 × 4
## party party_n_sent party_blame party_blame_percentage
## <fct> <int> <dbl> <dbl>
## 1 NB 17820 2015 11.3
## 2 FG 1879 159 8.46
## 3 EL 212044 16218 7.65
## 4 DF 232741 16714 7.18
## 5 SF 189120 13486 7.13
## 6 LA 64586 4051 6.27
## 7 RV 136331 7897 5.79
## 8 S 389092 21178 5.44
## 9 KF 181948 9282 5.10
## 10 KD 24077 1210 5.03
## 11 V 362274 17168 4.74
## 12 ALT 31299 1259 4.02
#show daily/montly counts of blame pr party and in total #Show daily/monthly relative amounts of blame pr party #Both of the above show also in regards to the amount of sentences by the party
#Put in lines of personal political scandales and global times of crisis
#Make model showing differences in amount of blame for center parties vs right/left wing #The same for opposition do the opposition always shwo more blame (here also interaction between right/left wing) (glmer with random intercept?)
## `summarise()` has grouped output by 'month'. You can override using the
## `.groups` argument.
#Monthly
ggplot(monthly_blame, aes(x = month, y = total_blame, color = party)) +
geom_smooth( span = 0.5, se = FALSE) + # per-party trends
geom_smooth(aes(x = month, y = total_blame), # global trend
color = "black", linetype = "dashed",
se = FALSE, span = 0.5) +
labs(
title = "Monthly blame Over Time by Party with Global Trend",
x = "Month",
y = "Total Blame"
) +
theme_minimal()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : at 2022.4
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : radius 6.9444e-07
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 2022.4
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 0.00083333
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : at 2022.6
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : radius 6.9444e-07
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 6.9444e-07
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : zero-width neighborhood. make span bigger
## Warning: Failed to fit group 2.
## Caused by error in `predLoess()`:
## ! NA/NaN/Inf in foreign function call (arg 5)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
#DO the above as a proportion of sentences uttered
#Lidstone smoothing, additive smoothing
monthly_blame <- monthly_blame %>%
mutate(offset_ratio = (total_blame+0.001)/(total_nr_sentences+0.001))
ggplot(monthly_blame, aes(x = month, y = offset_ratio, color = party)) +
geom_smooth(span = 0.5, se = FALSE) + # per-party trends
geom_smooth(aes(x = month, y = offset_ratio), # global trend
color = "black", linetype = "dashed",
se = FALSE, span = 0.5) +
labs(
title = "Monthly Blame Over Time by Party with Global Trend",
subtitle = "controlled by number of sentences uttered",
x = "Month",
y = "Blame pr sentence"
) +
theme_minimal()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : at 2022.4
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : radius 6.9444e-07
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 2022.4
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 0.00083333
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : at 2022.6
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : radius 6.9444e-07
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 6.9444e-07
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : zero-width neighborhood. make span bigger
## Warning: Failed to fit group 2.
## Caused by error in `predLoess()`:
## ! NA/NaN/Inf in foreign function call (arg 5)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
#same as above but with glm
ggplot(monthly_blame, aes(x = month, y = total_nr_sentences, color = party)) +
geom_smooth(span = 0.5, se = FALSE) + # per-party trends
geom_smooth(aes(x = month, y = total_nr_sentences), # global trend
color = "black", linetype = "dashed",
se = FALSE, span = 0.5) +
labs(
title = "Number of uttered sentences monthly",
x = "Month",
y = "Total sentences"
) +
theme_minimal()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : at 2022.4
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : radius 6.9444e-07
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 2022.4
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 0.00083333
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : at 2022.6
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : radius 6.9444e-07
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 6.9444e-07
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : zero-width neighborhood. make span bigger
## Warning: Failed to fit group 2.
## Caused by error in `predLoess()`:
## ! NA/NaN/Inf in foreign function call (arg 5)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
#Show number of sentences pr year
ggplot(monthly_blame, aes(x = month, y = total_nr_sentences))+
geom_smooth(aes(x = month, y = total_nr_sentences), span = 0.5, se = FALSE)+
labs(
title = "Number of uttered sentences monthly",
x = "Month",
y = "Total sentences"
) +
theme_minimal()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
In order to make it easier to do statistics and interpretation.Focus will not be on specific parties, but on left/right and center/wing
right_wing_parties <- c("V", "KF", "LA", "KD", "DF", "NB", "DD")
center_parties <- c("S", "V", "KF")
monthly_blame$block <- ifelse(monthly_blame$party %in% right_wing_parties, "Right", "Left")
monthly_blame$wing <- ifelse(monthly_blame$party %in% center_parties, "Center", "Wing")
monthly_blame$wing <- as.factor(monthly_blame$wing)
monthly_blame$block <- as.factor(monthly_blame$block)
monthly_blame$in_gov <- as.factor(monthly_blame$in_gov)
monthly_blame$month_index <- 12 * (as.numeric(format(monthly_blame$month, "%Y")) - 2000) + (as.numeric(format(monthly_blame$month, "%m")) - 1)
str(monthly_blame) #all good
## tibble [2,330 × 10] (S3: tbl_df/tbl/data.frame)
## $ month : 'yearmon' num [1:2330] Jan 2000 Jan 2000 Jan 2000 Jan 2000 ...
## $ party : Factor w/ 30 levels "-","ALT","CD",..: 5 6 13 14 22 23 24 30 5 6 ...
## $ in_gov : Factor w/ 2 levels "FALSE","TRUE": 1 1 1 1 2 2 1 1 1 1 ...
## $ total_nr_sentences: int [1:2330] 817 585 234 259 815 993 469 303 715 783 ...
## $ total_blame : num [1:2330] 67 26 16 20 40 41 34 15 18 57 ...
## $ year : num [1:2330] 0 0 0 0 0 0 0 0 0 0 ...
## $ offset_ratio : num [1:2330] 0.082 0.0444 0.0684 0.0772 0.0491 ...
## $ block : Factor w/ 2 levels "Left","Right": 2 1 2 2 1 1 1 2 2 1 ...
## $ wing : Factor w/ 2 levels "Center","Wing": 2 2 2 1 2 1 2 1 2 2 ...
## $ month_index : num [1:2330] 0 0 0 0 0 0 0 0 1 1 ...
#Blame predicted by time
model_blame_time_1 <- glmmTMB(total_blame ~ 1 + (1|party) + (1|in_gov) + offset(log(total_nr_sentences)),
family = nbinom2,
data = monthly_blame)
model_blame_time_2 <- glmmTMB(total_blame ~ month_index + (1|party) + (1|in_gov) + offset(log(total_nr_sentences)),
family = nbinom2,
data = monthly_blame)
summary(model_blame_time_2)
## Family: nbinom2 ( log )
## Formula:
## total_blame ~ month_index + (1 | party) + (1 | in_gov) + offset(log(total_nr_sentences))
## Data: monthly_blame
##
## AIC BIC logLik -2*log(L) df.resid
## 18997.7 19026.5 -9493.9 18987.7 2325
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## party (Intercept) 0.06931 0.2633
## in_gov (Intercept) 0.03542 0.1882
## Number of obs: 2330, groups: party, 13; in_gov, 2
##
## Dispersion parameter for nbinom2 family (): 7.34
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.8013871 0.1548944 -18.086 < 2e-16 ***
## month_index -0.0005429 0.0001121 -4.842 1.29e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_blame_time_3 <- glmmTMB(total_blame ~ scale(month_index) + I(scale(month_index)^2) + (1|party) + (1|in_gov) + offset(log(total_nr_sentences)),
family = nbinom2,
data = monthly_blame)
summary(model_blame_time_3)
## Family: nbinom2 ( log )
## Formula: total_blame ~ scale(month_index) + I(scale(month_index)^2) +
## (1 | party) + (1 | in_gov) + offset(log(total_nr_sentences))
## Data: monthly_blame
##
## AIC BIC logLik -2*log(L) df.resid
## 18998.5 19033.0 -9493.3 18986.5 2324
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## party (Intercept) 0.06764 0.2601
## in_gov (Intercept) 0.03505 0.1872
## Number of obs: 2330, groups: party, 13; in_gov, 2
##
## Dispersion parameter for nbinom2 family (): 7.33
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.894504 0.153249 -18.888 < 2e-16 ***
## scale(month_index) -0.042871 0.009339 -4.590 4.42e-06 ***
## I(scale(month_index)^2) 0.011431 0.010435 1.095 0.273
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model_blame_time_1, model_blame_time_2, model_blame_time_3)
## Data: monthly_blame
## Models:
## model_blame_time_1: total_blame ~ 1 + (1 | party) + (1 | in_gov) + offset(log(total_nr_sentences)), zi=~0, disp=~1
## model_blame_time_2: total_blame ~ month_index + (1 | party) + (1 | in_gov) + offset(log(total_nr_sentences)), zi=~0, disp=~1
## model_blame_time_3: total_blame ~ scale(month_index) + I(scale(month_index)^2) + , zi=~0, disp=~1
## model_blame_time_3: (1 | party) + (1 | in_gov) + offset(log(total_nr_sentences)), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## model_blame_time_1 4 19019 19042 -9505.5 19011
## model_blame_time_2 5 18998 19026 -9493.9 18988 23.2128 1 1.45e-06
## model_blame_time_3 6 18998 19033 -9493.3 18986 1.2022 1 0.2729
##
## model_blame_time_1
## model_blame_time_2 ***
## model_blame_time_3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Plot best model
#install.packages("sjPlot")
library(sjPlot)
sjPlot::plot_model(model_blame_time_2, type = "re")
## [[1]]
##
## [[2]]
#Due to the offset, in order to do meaningfull interpretation, the total number of sentences must be kept constant. Thus the following plot shows the change in predicted amount of blame pr 1000 sentences uttered as time goes on
sjPlot::plot_model(model_blame_time_2,
type = "pred",
terms = "month_index",
condition = c(total_nr_sentences = 100),
axis.title = c("Months after Jan 2000", "Predicted Blame Counts pr 100 Uttered Sentences"),
title = "General Evolution of Blame in the Danish Parliament over time",
ci.lvl = NA,
axis.lim = c(3,7)
) +
theme_minimal()
## You are calculating adjusted predictions on the population-level (i.e.
## `type = "fixed"`) for a *generalized* linear mixed model.
## This may produce biased estimates due to Jensen's inequality. Consider
## setting `bias_correction = TRUE` to correct for this bias.
## See also the documentation of the `bias_correction` argument.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
#shows downtrend over time
#check assumptions
model_check(model_blame_time_2)
## chisq ratio rdf p
## 2.517030e+03 1.082594e+00 2.325000e+03 2.963630e-03
model_intercept <- glmmTMB(total_blame ~ 1 + (1|party) + (1|month_index)+ offset(log(total_nr_sentences)),
family = nbinom1,
data = monthly_blame)
summary(model_intercept)
## Family: nbinom1 ( log )
## Formula:
## total_blame ~ 1 + (1 | party) + (1 | month_index) + offset(log(total_nr_sentences))
## Data: monthly_blame
##
## AIC BIC logLik -2*log(L) df.resid
## 18732.1 18755.1 -9362.0 18724.1 2326
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## party (Intercept) 0.07830 0.2798
## month_index (Intercept) 0.05222 0.2285
## Number of obs: 2330, groups: party, 13; month_index, 274
##
## Dispersion parameter for nbinom1 family (): 3.8
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.77856 0.08258 -33.65 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_in_gov <- glmmTMB(total_blame ~ in_gov + (1|party) + (1|month_index)+ offset(log(total_nr_sentences)),
family = nbinom1,
data = monthly_blame)
summary(model_in_gov)
## Family: nbinom1 ( log )
## Formula:
## total_blame ~ in_gov + (1 | party) + (1 | month_index) + offset(log(total_nr_sentences))
## Data: monthly_blame
##
## AIC BIC logLik -2*log(L) df.resid
## 18408.4 18437.1 -9199.2 18398.4 2325
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## party (Intercept) 0.06295 0.2509
## month_index (Intercept) 0.05298 0.2302
## Number of obs: 2330, groups: party, 13; month_index, 274
##
## Dispersion parameter for nbinom1 family (): 3.14
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.74565 0.07445 -36.88 <2e-16 ***
## in_govTRUE -0.37098 0.01985 -18.69 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_block <- glmmTMB(total_blame ~ in_gov + block + (1|party) + (1|month_index) + offset(log(total_nr_sentences)),
family = nbinom1,
data = monthly_blame)
summary(model_block)
## Family: nbinom1 ( log )
## Formula:
## total_blame ~ in_gov + block + (1 | party) + (1 | month_index) +
## offset(log(total_nr_sentences))
## Data: monthly_blame
##
## AIC BIC logLik -2*log(L) df.resid
## 18410.4 18444.9 -9199.2 18398.4 2324
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## party (Intercept) 0.06295 0.2509
## month_index (Intercept) 0.05298 0.2302
## Number of obs: 2330, groups: party, 13; month_index, 274
##
## Dispersion parameter for nbinom1 family (): 3.14
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.75539 0.10680 -25.800 <2e-16 ***
## in_govTRUE -0.37098 0.01985 -18.688 <2e-16 ***
## blockRight 0.01850 0.14572 0.127 0.899
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_block_int <- glmmTMB(total_blame ~ in_gov + block + in_gov*block + (1|party) + (1|month_index)+ offset(log(total_nr_sentences)),
family = nbinom1,
data = monthly_blame)
summary(model_block_int)
## Family: nbinom1 ( log )
## Formula:
## total_blame ~ in_gov + block + in_gov * block + (1 | party) +
## (1 | month_index) + offset(log(total_nr_sentences))
## Data: monthly_blame
##
## AIC BIC logLik -2*log(L) df.resid
## 18399.7 18440.0 -9192.8 18385.7 2323
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## party (Intercept) 0.06153 0.2481
## month_index (Intercept) 0.05271 0.2296
## Number of obs: 2330, groups: party, 13; month_index, 274
##
## Dispersion parameter for nbinom1 family (): 3.12
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.748354 0.105649 -26.014 < 2e-16 ***
## in_govTRUE -0.444118 0.028757 -15.444 < 2e-16 ***
## blockRight -0.001699 0.144256 -0.012 0.990604
## in_govTRUE:blockRight 0.177976 0.050112 3.552 0.000383 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_wing_block <- glmmTMB(total_blame ~ in_gov + block + in_gov * block + wing + (1|party) + (1|month_index) + offset(log(total_nr_sentences)),
family = nbinom1,
data = monthly_blame)
summary(model_wing_block)
## Family: nbinom1 ( log )
## Formula:
## total_blame ~ in_gov + block + in_gov * block + wing + (1 | party) +
## (1 | month_index) + offset(log(total_nr_sentences))
## Data: monthly_blame
##
## AIC BIC logLik -2*log(L) df.resid
## 18399.7 18445.8 -9191.9 18383.7 2322
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## party (Intercept) 0.05280 0.2298
## month_index (Intercept) 0.05272 0.2296
## Number of obs: 2330, groups: party, 13; month_index, 274
##
## Dispersion parameter for nbinom1 family (): 3.12
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.93525 0.16169 -18.154 < 2e-16 ***
## in_govTRUE -0.44354 0.02876 -15.423 < 2e-16 ***
## blockRight 0.02874 0.13591 0.211 0.832521
## wingWing 0.22576 0.15634 1.444 0.148720
## in_govTRUE:blockRight 0.17926 0.05013 3.576 0.000349 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model_intercept, model_in_gov, model_block, model_block_int, model_wing_block)
## Data: monthly_blame
## Models:
## model_intercept: total_blame ~ 1 + (1 | party) + (1 | month_index) + offset(log(total_nr_sentences)), zi=~0, disp=~1
## model_in_gov: total_blame ~ in_gov + (1 | party) + (1 | month_index) + offset(log(total_nr_sentences)), zi=~0, disp=~1
## model_block: total_blame ~ in_gov + block + (1 | party) + (1 | month_index) + , zi=~0, disp=~1
## model_block: offset(log(total_nr_sentences)), zi=~0, disp=~1
## model_block_int: total_blame ~ in_gov + block + in_gov * block + (1 | party) + , zi=~0, disp=~1
## model_block_int: (1 | month_index) + offset(log(total_nr_sentences)), zi=~0, disp=~1
## model_wing_block: total_blame ~ in_gov + block + in_gov * block + wing + (1 | party) + , zi=~0, disp=~1
## model_wing_block: (1 | month_index) + offset(log(total_nr_sentences)), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## model_intercept 4 18732 18755 -9362.0 18724
## model_in_gov 5 18408 18437 -9199.2 18398 325.6978 1 < 2.2e-16 ***
## model_block 6 18410 18445 -9199.2 18398 0.0161 1 0.8989455
## model_block_int 7 18400 18440 -9192.8 18386 12.6684 1 0.0003719 ***
## model_wing_block 8 18400 18446 -9191.9 18384 1.9602 1 0.1614959
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model_in_gov, model_block_int)
## Data: monthly_blame
## Models:
## model_in_gov: total_blame ~ in_gov + (1 | party) + (1 | month_index) + offset(log(total_nr_sentences)), zi=~0, disp=~1
## model_block_int: total_blame ~ in_gov + block + in_gov * block + (1 | party) + , zi=~0, disp=~1
## model_block_int: (1 | month_index) + offset(log(total_nr_sentences)), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## model_in_gov 5 18408 18437 -9199.2 18398
## model_block_int 7 18400 18440 -9192.8 18386 12.684 2 0.00176 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#check assumptions of best model
model_check(model_block_int)
## chisq ratio rdf p
## 2081.4537287 0.8960197 2323.0000000 0.9998762
#SOme deviation, but cannot be made better
#plot best model (Intercept) -2.748347 0.105649 -26.014 < 2e-16
in_govTRUE -0.444119 0.028757 -15.444 < 2e-16
blockRight -0.001707 0.144255 -0.012 0.990559
in_govTRUE:blockRight 0.177978 0.050112 3.552 0.000383 ***
sjPlot::plot_model(model_block_int,
type = "int",
#terms = c("in_gov", "block"),
condition = c(total_nr_sentences = 100),
axis.title = c("In government", "Predicted Blame Counts pr 100 Uttered Sentences"),
title = "Blame Predicted by Government and Block",
ci.lvl = NA,
show.legend = TRUE,
axis.lim = c(3,7)
) +
theme_minimal()
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
sjPlot::plot_model(model_block_int,
type = "pred",
terms = c("block"),
condition = c(total_nr_sentences = 100),
axis.title = c("In government", "Predicted Blame Counts pr 100 Uttered Sentences"),
title = "Blame Predicted by Block",
ci.lvl = NA,
axis.lim = c(3,7)
) +
theme_minimal()
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
#The first plot shows clear differences in government along with the positive interaction term as illustrated by the different nature of black by the levels of government
#The second plot shows the lack of influence by block alone as found by the model
#Something seems to happen (see the time_series plot) in recent years, thus data since 2019 will also be evaluated (nye Borgerlige enters the room)
#Make data
monthly_blame_18 <- monthly_blame %>%
filter(year >=19) %>%
mutate(month_index = month_index-227)
#plot data
ggplot(monthly_blame_18, aes(x = month, y = offset_ratio, color = party)) +
geom_smooth(span = 0.5, se = FALSE) + # per-party trends
geom_smooth(aes(x = month, y = offset_ratio), # global trend
color = "black", linetype = "dashed",
se = FALSE, span = 0.3) +
labs(
title = "Monthly blame Over Time by Party with Global Trend",
x = "Month",
y = "Blame pr sentence"
) +
theme_minimal()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : at 2022.4
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : radius 6.9444e-07
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 2022.4
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 0.00083333
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : at 2022.6
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : radius 6.9444e-07
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 6.9444e-07
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : zero-width neighborhood. make span bigger
## Warning: Failed to fit group 2.
## Caused by error in `predLoess()`:
## ! NA/NaN/Inf in foreign function call (arg 5)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#Do analysis `
model_intercept_18 <- glmmTMB(total_blame ~ 1 + (1|party) + (1|month_index)+ offset(log(total_nr_sentences)),
family = nbinom1,
data = monthly_blame_18)
summary(model_intercept_18)
## Family: nbinom1 ( log )
## Formula:
## total_blame ~ 1 + (1 | party) + (1 | month_index) + offset(log(total_nr_sentences))
## Data: monthly_blame_18
##
## AIC BIC logLik -2*log(L) df.resid
## 3777.0 3794.0 -1884.5 3769.0 504
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## party (Intercept) 0.07798 0.2792
## month_index (Intercept) 0.05893 0.2428
## Number of obs: 508, groups: party, 13; month_index, 48
##
## Dispersion parameter for nbinom1 family (): 2.15
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.74911 0.08866 -31.01 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_in_gov_18 <- glmmTMB(total_blame ~ in_gov + (1|party) + (1|month_index)+ offset(log(total_nr_sentences)),
family = nbinom1,
data = monthly_blame_18)
summary(model_in_gov_18)
## Family: nbinom1 ( log )
## Formula:
## total_blame ~ in_gov + (1 | party) + (1 | month_index) + offset(log(total_nr_sentences))
## Data: monthly_blame_18
##
## AIC BIC logLik -2*log(L) df.resid
## 3759.5 3780.6 -1874.7 3749.5 503
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## party (Intercept) 0.05139 0.2267
## month_index (Intercept) 0.05870 0.2423
## Number of obs: 508, groups: party, 13; month_index, 48
##
## Dispersion parameter for nbinom1 family (): 2.05
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.72447 0.07605 -35.82 < 2e-16 ***
## in_govTRUE -0.35779 0.07945 -4.50 6.7e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_block_18 <- glmmTMB(total_blame ~ in_gov + block + (1|party) + (1|month_index) + offset(log(total_nr_sentences)),
family = nbinom1,
data = monthly_blame_18)
summary(model_block_18)
## Family: nbinom1 ( log )
## Formula:
## total_blame ~ in_gov + block + (1 | party) + (1 | month_index) +
## offset(log(total_nr_sentences))
## Data: monthly_blame_18
##
## AIC BIC logLik -2*log(L) df.resid
## 3756.6 3782.0 -1872.3 3744.6 502
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## party (Intercept) 0.03331 0.1825
## month_index (Intercept) 0.05845 0.2418
## Number of obs: 508, groups: party, 13; month_index, 48
##
## Dispersion parameter for nbinom1 family (): 2.05
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.87114 0.08823 -32.54 < 2e-16 ***
## in_govTRUE -0.35356 0.07766 -4.55 5.3e-06 ***
## blockRight 0.26927 0.11060 2.43 0.0149 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_wing_block_18 <- glmmTMB(total_blame ~ in_gov + wing + block + (1|party) + (1|month_index) + offset(log(total_nr_sentences)),
family = nbinom1,
data = monthly_blame_18)
summary(model_wing_block_18)
## Family: nbinom1 ( log )
## Formula:
## total_blame ~ in_gov + wing + block + (1 | party) + (1 | month_index) +
## offset(log(total_nr_sentences))
## Data: monthly_blame_18
##
## AIC BIC logLik -2*log(L) df.resid
## 3748.0 3777.6 -1867.0 3734.0 501
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## party (Intercept) 0.01110 0.1054
## month_index (Intercept) 0.05819 0.2412
## Number of obs: 508, groups: party, 13; month_index, 48
##
## Dispersion parameter for nbinom1 family (): 2.06
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.17631 0.09475 -33.52 < 2e-16 ***
## in_govTRUE -0.28184 0.07696 -3.66 0.00025 ***
## wingWing 0.34629 0.08270 4.19 2.82e-05 ***
## blockRight 0.33845 0.07221 4.69 2.77e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_block_int_18 <- glmmTMB(total_blame ~ in_gov + block + in_gov * block + (1|party) + (1|month_index) + offset(log(total_nr_sentences)),
family = nbinom1,
data = monthly_blame_18)
summary(model_block_int_18)
## Family: nbinom1 ( log )
## Formula:
## total_blame ~ in_gov + block + in_gov * block + (1 | party) +
## (1 | month_index) + offset(log(total_nr_sentences))
## Data: monthly_blame_18
##
## AIC BIC logLik -2*log(L) df.resid
## 3757.9 3787.5 -1871.9 3743.9 501
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## party (Intercept) 0.03241 0.1800
## month_index (Intercept) 0.05920 0.2433
## Number of obs: 508, groups: party, 13; month_index, 48
##
## Dispersion parameter for nbinom1 family (): 2.04
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.86226 0.08801 -32.52 < 2e-16 ***
## in_govTRUE -0.41292 0.10202 -4.05 5.18e-05 ***
## blockRight 0.25749 0.11009 2.34 0.0193 *
## in_govTRUE:blockRight 0.15552 0.17963 0.87 0.3866
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model_intercept_18, model_in_gov_18, model_block_18, model_wing_block_18, model_block_int_18)
## Data: monthly_blame_18
## Models:
## model_intercept_18: total_blame ~ 1 + (1 | party) + (1 | month_index) + offset(log(total_nr_sentences)), zi=~0, disp=~1
## model_in_gov_18: total_blame ~ in_gov + (1 | party) + (1 | month_index) + offset(log(total_nr_sentences)), zi=~0, disp=~1
## model_block_18: total_blame ~ in_gov + block + (1 | party) + (1 | month_index) + , zi=~0, disp=~1
## model_block_18: offset(log(total_nr_sentences)), zi=~0, disp=~1
## model_wing_block_18: total_blame ~ in_gov + wing + block + (1 | party) + (1 | month_index) + , zi=~0, disp=~1
## model_wing_block_18: offset(log(total_nr_sentences)), zi=~0, disp=~1
## model_block_int_18: total_blame ~ in_gov + block + in_gov * block + (1 | party) + , zi=~0, disp=~1
## model_block_int_18: (1 | month_index) + offset(log(total_nr_sentences)), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## model_intercept_18 4 3777.0 3794.0 -1884.5 3769.0
## model_in_gov_18 5 3759.5 3780.6 -1874.7 3749.5 19.5800 1 9.647e-06
## model_block_18 6 3756.6 3782.0 -1872.3 3744.6 4.8519 1 0.027616
## model_wing_block_18 7 3748.0 3777.6 -1867.0 3734.0 10.6157 1 0.001121
## model_block_int_18 7 3757.9 3787.5 -1871.9 3743.9 0.0000 0 1.000000
##
## model_intercept_18
## model_in_gov_18 ***
## model_block_18 *
## model_wing_block_18 **
## model_block_int_18
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#check assumption of best model
model_check(model_wing_block_18)
## chisq ratio rdf p
## 458.0712331 0.9143138 501.0000000 0.9155503
#Plot best model
summary(model_wing_block_18)
## Family: nbinom1 ( log )
## Formula:
## total_blame ~ in_gov + wing + block + (1 | party) + (1 | month_index) +
## offset(log(total_nr_sentences))
## Data: monthly_blame_18
##
## AIC BIC logLik -2*log(L) df.resid
## 3748.0 3777.6 -1867.0 3734.0 501
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## party (Intercept) 0.01110 0.1054
## month_index (Intercept) 0.05819 0.2412
## Number of obs: 508, groups: party, 13; month_index, 48
##
## Dispersion parameter for nbinom1 family (): 2.06
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.17631 0.09475 -33.52 < 2e-16 ***
## in_govTRUE -0.28184 0.07696 -3.66 0.00025 ***
## wingWing 0.34629 0.08270 4.19 2.82e-05 ***
## blockRight 0.33845 0.07221 4.69 2.77e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sjPlot::plot_model(model_wing_block_18,
type = "pred",
terms = c("in_gov", "wing"),
condition = c(total_nr_sentences = 100),
axis.title = c("In Government", "Predicted Blame Counts pr 100 Uttered Sentences"),
title = "Predicted Blame by In Government and Wing in period 2019-2022",
ci.lvl = NA,
axis.lim = c(3,7)
) +
theme_minimal()
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
sjPlot::plot_model(model_wing_block_18,
type = "pred",
terms = c("block"),
condition = c(total_nr_sentences = 100),
axis.title = c("Block", "Predicted Blame Counts pr 100 Uttered Sentences"),
title = "Predicted Blame by Block in period 2019-2022",
ci.lvl = NA,
axis.lim = c(3,6.5)
) +
theme_minimal()
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
While the general amount of blame has not changed according to this paper, it seems like the polarization has become more clear than before with also wing and block in addition to in_gov being significant predictors of
(Intercept) -3.16130 0.09924 -31.86 < 2e-16 in_govTRUE -0.27310 0.08655 -3.16 0.0016 wingWing 0.34538 0.08697 3.97 7.15e-05 blockRight 0.32434 0.07422 4.37 1.24e-05 *
#time as predictor of blame in recent years
model_blame_time_18_intercept <- glmmTMB(total_blame ~ 1 + (1|party) + (1|in_gov) + offset(log(total_nr_sentences)),
family = nbinom2,
data = monthly_blame_18)
summary(model_blame_time_18_intercept)
## Family: nbinom2 ( log )
## Formula:
## total_blame ~ 1 + (1 | party) + (1 | in_gov) + offset(log(total_nr_sentences))
## Data: monthly_blame_18
##
## AIC BIC logLik -2*log(L) df.resid
## 3934.1 3951.0 -1963.1 3926.1 504
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## party (Intercept) 0.05882 0.2425
## in_gov (Intercept) 0.03217 0.1794
## Number of obs: 508, groups: party, 13; in_gov, 2
##
## Dispersion parameter for nbinom2 family (): 7.63
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.836 0.156 -18.18 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_blame_time_18 <- glmmTMB(total_blame ~ month_index + (1|party) + (1|in_gov) + offset(log(total_nr_sentences)),
family = nbinom2,
data = monthly_blame_18)
summary(model_blame_time_18)
## Family: nbinom2 ( log )
## Formula:
## total_blame ~ month_index + (1 | party) + (1 | in_gov) + offset(log(total_nr_sentences))
## Data: monthly_blame_18
##
## AIC BIC logLik -2*log(L) df.resid
## 3925.2 3946.4 -1957.6 3915.2 503
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## party (Intercept) 0.05319 0.2306
## in_gov (Intercept) 0.03281 0.1811
## Number of obs: 508, groups: party, 13; in_gov, 2
##
## Dispersion parameter for nbinom2 family (): 7.85
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.967129 0.159502 -18.602 < 2e-16 ***
## month_index 0.004711 0.001420 3.318 0.000906 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_blame_time_18_2 <- glmmTMB(total_blame ~ month_index + I(month_index^2) + (1|party) + (1|in_gov) + offset(log(total_nr_sentences)),
family = nbinom2,
data = monthly_blame_18)
summary(model_blame_time_18_2)
## Family: nbinom2 ( log )
## Formula: total_blame ~ month_index + I(month_index^2) + (1 | party) +
## (1 | in_gov) + offset(log(total_nr_sentences))
## Data: monthly_blame_18
##
## AIC BIC logLik -2*log(L) df.resid
## 3915.8 3941.2 -1951.9 3903.8 502
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## party (Intercept) 0.05340 0.2311
## in_gov (Intercept) 0.03367 0.1835
## Number of obs: 508, groups: party, 13; in_gov, 2
##
## Dispersion parameter for nbinom2 family (): 8.22
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.1216246 0.1669553 -18.697 < 2e-16 ***
## month_index 0.0227305 0.0055630 4.086 4.39e-05 ***
## I(month_index^2) -0.0003686 0.0001135 -3.248 0.00116 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model_blame_time_18_intercept,model_blame_time_18, model_blame_time_18_2)
## Data: monthly_blame_18
## Models:
## model_blame_time_18_intercept: total_blame ~ 1 + (1 | party) + (1 | in_gov) + offset(log(total_nr_sentences)), zi=~0, disp=~1
## model_blame_time_18: total_blame ~ month_index + (1 | party) + (1 | in_gov) + offset(log(total_nr_sentences)), zi=~0, disp=~1
## model_blame_time_18_2: total_blame ~ month_index + I(month_index^2) + (1 | party) + , zi=~0, disp=~1
## model_blame_time_18_2: (1 | in_gov) + offset(log(total_nr_sentences)), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df
## model_blame_time_18_intercept 4 3934.1 3951.0 -1963.0 3926.1
## model_blame_time_18 5 3925.2 3946.4 -1957.6 3915.2 10.903 1
## model_blame_time_18_2 6 3915.8 3941.2 -1951.9 3903.8 11.406 1
## Pr(>Chisq)
## model_blame_time_18_intercept
## model_blame_time_18 0.0009599 ***
## model_blame_time_18_2 0.0007321 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model_check(model_blame_time_18_2)
## DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details
## DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details
## Warning in newton(lsp = lsp, X = G$X, y = G$y, Eb = G$Eb, UrS = G$UrS, L = G$L,
## : Fitting terminated with step failure - check results carefully
## chisq ratio rdf p
## 544.66104512 1.08498216 502.00000000 0.09160867
Cannot fix assummptions here
#Plot this model
sjPlot::plot_model(model_blame_time_18_2,
type = "pred",
terms = c("month_index"),
condition = c(total_nr_sentences = 100),
axis.title = c("Month since Jan 2019", "Predicted Blame Counts pr 100 Uttered Sentences"),
title = "Evolution of the Use of Blame in period 2019-2022",
ci.lvl = NA,
axis.lim = c(3,7)
) +
theme_minimal()
## Model uses a transformed offset term. Predictions may not be correct.
## It is recommended to fix the offset term using the `condition` argument,
## e.g. `condition = c(party = 1)`.
## You could also transform the offset variable before fitting the model
## and use `offset(party)` in the model formula.
## Model contains polynomial or cubic / quadratic terms. Consider using
## `terms="month_index [all]"` to get smooth plots. See also
## package-vignette 'Adjusted Predictions at Specific Values'.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
#check assumption
model_check(model_blame_time_18_2)
## DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details
## DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details
## Warning in newton(lsp = lsp, X = G$X, y = G$y, Eb = G$Eb, UrS = G$UrS, L = G$L,
## : Fitting terminated with step failure - check results carefully
## chisq ratio rdf p
## 544.66104512 1.08498216 502.00000000 0.09160867
#Now the trend is turning and in the last few years it has been going up
# Define election months
election_months <- as.yearmon(c("Feb 2005", "Nov 2007", "Sep 2011", "Jun 2015", "Jun 2019"))
monthly_blame$time_to_election <- ifelse(monthly_blame$month %in% election_months, 0, NA )
# Assume monthly_blame$month is also yearmon
# If not, convert:
# monthly_blame$month <- as.yearmon(monthly_blame$month)
# Function to compute time to nearest election
time_to_nearest_election <- function(month, elections, window = 12) {
# compute differences in months
diffs <- as.numeric((month - elections) * 12)
# find the nearest election
closest <- diffs[which.min(abs(diffs))]
# return NA if farther than ±window months
if (abs(closest) > window) {
return(NA_real_)
} else {
return(+closest) # negative before, positive after (flip sign for intuition)
}
}
# Apply function to each month
monthly_blame <- monthly_blame %>%
mutate(time_since_election = sapply(month, time_to_nearest_election, elections = election_months))
election_monthly_blame <- monthly_blame %>%
filter(!is.na(time_since_election))
election_model_intercept <- glmmTMB(total_blame ~ 1 + (1|party) + (1|in_gov) + offset(log(total_nr_sentences)),
family = nbinom1,
data = election_monthly_blame)
summary(election_model_intercept)
## Family: nbinom1 ( log )
## Formula:
## total_blame ~ 1 + (1 | party) + (1 | in_gov) + offset(log(total_nr_sentences))
## Data: election_monthly_blame
##
## AIC BIC logLik -2*log(L) df.resid
## 8258.7 8278.4 -4125.4 8250.7 1015
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## party (Intercept) 0.04387 0.2095
## in_gov (Intercept) 0.01920 0.1386
## Number of obs: 1019, groups: party, 11; in_gov, 2
##
## Dispersion parameter for nbinom1 family (): 5.68
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.8999 0.1195 -24.27 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
election_model <- glmmTMB(total_blame ~ time_since_election + (1|party) + (1|in_gov) + offset(log(total_nr_sentences)),
family = nbinom1,
data = election_monthly_blame)
summary(election_model)
## Family: nbinom1 ( log )
## Formula:
## total_blame ~ time_since_election + (1 | party) + (1 | in_gov) +
## offset(log(total_nr_sentences))
## Data: election_monthly_blame
##
## AIC BIC logLik -2*log(L) df.resid
## 8254.9 8279.5 -4122.4 8244.9 1014
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## party (Intercept) 0.04144 0.2036
## in_gov (Intercept) 0.01971 0.1404
## Number of obs: 1019, groups: party, 11; in_gov, 2
##
## Dispersion parameter for nbinom1 family (): 5.64
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.902868 0.119578 -24.276 <2e-16 ***
## time_since_election 0.003809 0.001570 2.426 0.0153 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
election_model_quad <- glmmTMB(total_blame ~ time_since_election + I(time_since_election^2) + (1|party) + (1|in_gov) + offset(log(total_nr_sentences)),
family = nbinom1,
data = election_monthly_blame)
## Warning in (function (start, objective, gradient = NULL, hessian = NULL, :
## NA/NaN function evaluation
summary(election_model_quad)
## Family: nbinom1 ( log )
## Formula:
## total_blame ~ time_since_election + I(time_since_election^2) +
## (1 | party) + (1 | in_gov) + offset(log(total_nr_sentences))
## Data: election_monthly_blame
##
## AIC BIC logLik -2*log(L) df.resid
## 8251.7 8281.3 -4119.9 8239.7 1013
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## party (Intercept) 0.04131 0.2033
## in_gov (Intercept) 0.02029 0.1424
## Number of obs: 1019, groups: party, 11; in_gov, 2
##
## Dispersion parameter for nbinom1 family (): 5.58
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.9362358 0.1216146 -24.144 <2e-16 ***
## time_since_election 0.0036564 0.0015348 2.382 0.0172 *
## I(time_since_election^2) 0.0005435 0.0002393 2.272 0.0231 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(election_model_intercept,election_model, election_model_quad)
## Data: election_monthly_blame
## Models:
## election_model_intercept: total_blame ~ 1 + (1 | party) + (1 | in_gov) + offset(log(total_nr_sentences)), zi=~0, disp=~1
## election_model: total_blame ~ time_since_election + (1 | party) + (1 | in_gov) + , zi=~0, disp=~1
## election_model: offset(log(total_nr_sentences)), zi=~0, disp=~1
## election_model_quad: total_blame ~ time_since_election + I(time_since_election^2) + , zi=~0, disp=~1
## election_model_quad: (1 | party) + (1 | in_gov) + offset(log(total_nr_sentences)), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df
## election_model_intercept 4 8258.7 8278.4 -4125.4 8250.7
## election_model 5 8254.9 8279.5 -4122.4 8244.9 5.8790 1
## election_model_quad 6 8251.7 8281.3 -4119.9 8239.7 5.1088 1
## Pr(>Chisq)
## election_model_intercept
## election_model 0.01532 *
## election_model_quad 0.02381 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#check assumptions
model_check(election_model_quad)
## Warning in newton(lsp = lsp, X = G$X, y = G$y, Eb = G$Eb, UrS = G$UrS, L = G$L,
## : Fitting terminated with step failure - check results carefully
## chisq ratio rdf p
## 996.1549246 0.9833711 1013.0000000 0.6411390
ggplot(election_monthly_blame, aes(x = time_since_election, y=offset_ratio, color = party)) +
geom_smooth()+
geom_smooth(aes(x = time_since_election, y=offset_ratio), color = "black", linetype = "dashed")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
model_check(election_model_quad)
## Warning in newton(lsp = lsp, X = G$X, y = G$y, Eb = G$Eb, UrS = G$UrS, L = G$L,
## : Fitting terminated with step failure - check results carefully
## chisq ratio rdf p
## 996.1549246 0.9833711 1013.0000000 0.6411390
#plot best model
sjPlot::plot_model(election_model_quad,
type = "pred",
terms = "time_since_election[all]",
condition = c(total_nr_sentences = 100),
axis.title = c("Months Since Nearest Election", "Predicted Blame Counts pr 100 Uttered Sentences"),
title = "Predicted Counts of blame by time since nearest election ",
ci.lvl = NA,
axis.lim = c(3,7)
) +
theme_minimal()
## Model uses a transformed offset term. Predictions may not be correct.
## It is recommended to fix the offset term using the `condition` argument,
## e.g. `condition = c(party = 1)`.
## You could also transform the offset variable before fitting the model
## and use `offset(party)` in the model formula.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
names(monthly_blame)
## [1] "month" "party" "in_gov"
## [4] "total_nr_sentences" "total_blame" "year"
## [7] "offset_ratio" "block" "wing"
## [10] "month_index" "time_to_election" "time_since_election"
length(monthly_blame$month)
## [1] 2330
min(monthly_blame$month_index)
## [1] 0